Merge "Add parallel-deblocking experiment" into nextgenv2
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 4735199..eebdc0c 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -191,6 +191,7 @@
endif # CONFIG_AOM_HIGHBITDEPTH
DSP_SRCS-yes += txfm_common.h
+DSP_SRCS-yes += x86/txfm_common_intrin.h
DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h
DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h
# forward transform
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 6d873bc..d6fa90b 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -701,6 +701,34 @@
#
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct4x4 sse2/;
+
+ add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct4x4_1 sse2/;
+
+ add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct8x8 sse2/;
+
+ add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct8x8_1 sse2/;
+
+ add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct16x16 sse2/;
+
+ add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct16x16_1 sse2 avx2/;
+
+ add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32 sse2 avx2/;
+
+ add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32_rd sse2 avx2/;
+
+ add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32_1 sse2 avx2/;
+
+ # High bit depth
add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/aom_highbd_fdct4x4 sse2/;
@@ -724,33 +752,34 @@
add_proto qw/void aom_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/aom_highbd_fdct32x32_1/;
- } # CONFIG_AOM_HIGHBITDEPTH
- add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct4x4 sse2 msa/;
+ } else {
+ add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct4x4 sse2 msa/;
- add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct4x4_1 sse2/;
+ add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct4x4_1 sse2/;
- add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
+ add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
- add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8_1 sse2 neon msa/;
+ add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct8x8_1 sse2 neon msa/;
- add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct16x16 sse2 msa/;
+ add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct16x16 sse2 msa/;
- add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct16x16_1 sse2 avx2 msa/;
+ add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct16x16_1 sse2 avx2 msa/;
- add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32 sse2 avx2 msa/;
+ add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32 sse2 avx2 msa/;
- add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
+ add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
- add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
+ add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
+ } # CONFIG_AOM_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER
#
diff --git a/aom_dsp/x86/fwd_dct32x32_impl_avx2.h b/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
index 8b136e7..2167395 100644
--- a/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
+++ b/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
@@ -12,6 +12,7 @@
#include <immintrin.h> // AVX2
#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/txfm_common_intrin.h"
#include "aom_dsp/x86/txfm_common_avx2.h"
#if FDCT32x32_HIGH_PRECISION
@@ -31,7 +32,19 @@
}
#endif
-void FDCT32x32_2D_AVX2(const int16_t *input, int16_t *output_org, int stride) {
+#ifndef STORE_COEFF_FUNC
+#define STORE_COEFF_FUNC
+static void store_coeff(const __m256i *coeff, tran_low_t *curr,
+ tran_low_t *next) {
+ __m128i u = _mm256_castsi256_si128(*coeff);
+ storeu_output(&u, curr);
+ u = _mm256_extractf128_si256(*coeff, 1);
+ storeu_output(&u, next);
+}
+#endif
+
+void FDCT32x32_2D_AVX2(const int16_t *input, tran_low_t *output_org,
+ int stride) {
// Calculate pre-multiplied strides
const int str1 = stride;
const int str2 = 2 * stride;
@@ -2842,13 +2855,14 @@
{
int transpose_block;
int16_t *output_currStep, *output_nextStep;
- if (0 == pass) {
- output_currStep = &intermediate[column_start * 32];
- output_nextStep = &intermediate[(column_start + 8) * 32];
- } else {
- output_currStep = &output_org[column_start * 32];
- output_nextStep = &output_org[(column_start + 8) * 32];
- }
+ tran_low_t *curr_out, *next_out;
+ // Pass 0
+ output_currStep = &intermediate[column_start * 32];
+ output_nextStep = &intermediate[(column_start + 8) * 32];
+ // Pass 1
+ curr_out = &output_org[column_start * 32];
+ next_out = &output_org[(column_start + 8) * 32];
+
for (transpose_block = 0; transpose_block < 4; ++transpose_block) {
__m256i *this_out = &out[8 * transpose_block];
// 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
@@ -2948,44 +2962,58 @@
tr2_6 = _mm256_srai_epi16(tr2_6, 2);
tr2_7 = _mm256_srai_epi16(tr2_7, 2);
}
- // Note: even though all these stores are aligned, using the aligned
- // intrinsic make the code slightly slower.
- _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32),
- _mm256_castsi256_si128(tr2_0));
- _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32),
- _mm256_castsi256_si128(tr2_1));
- _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32),
- _mm256_castsi256_si128(tr2_2));
- _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32),
- _mm256_castsi256_si128(tr2_3));
- _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32),
- _mm256_castsi256_si128(tr2_4));
- _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32),
- _mm256_castsi256_si128(tr2_5));
- _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32),
- _mm256_castsi256_si128(tr2_6));
- _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32),
- _mm256_castsi256_si128(tr2_7));
+ if (0 == pass) {
+ // Note: even though all these stores are aligned, using the aligned
+ // intrinsic make the code slightly slower.
+ _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32),
+ _mm256_castsi256_si128(tr2_0));
+ _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32),
+ _mm256_castsi256_si128(tr2_1));
+ _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32),
+ _mm256_castsi256_si128(tr2_2));
+ _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32),
+ _mm256_castsi256_si128(tr2_3));
+ _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32),
+ _mm256_castsi256_si128(tr2_4));
+ _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32),
+ _mm256_castsi256_si128(tr2_5));
+ _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32),
+ _mm256_castsi256_si128(tr2_6));
+ _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32),
+ _mm256_castsi256_si128(tr2_7));
- _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32),
- _mm256_extractf128_si256(tr2_0, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32),
- _mm256_extractf128_si256(tr2_1, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32),
- _mm256_extractf128_si256(tr2_2, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32),
- _mm256_extractf128_si256(tr2_3, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32),
- _mm256_extractf128_si256(tr2_4, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32),
- _mm256_extractf128_si256(tr2_5, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32),
- _mm256_extractf128_si256(tr2_6, 1));
- _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32),
- _mm256_extractf128_si256(tr2_7, 1));
- // Process next 8x8
- output_currStep += 8;
- output_nextStep += 8;
+ _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32),
+ _mm256_extractf128_si256(tr2_0, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32),
+ _mm256_extractf128_si256(tr2_1, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32),
+ _mm256_extractf128_si256(tr2_2, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32),
+ _mm256_extractf128_si256(tr2_3, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32),
+ _mm256_extractf128_si256(tr2_4, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32),
+ _mm256_extractf128_si256(tr2_5, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32),
+ _mm256_extractf128_si256(tr2_6, 1));
+ _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32),
+ _mm256_extractf128_si256(tr2_7, 1));
+ // Process next 8x8
+ output_currStep += 8;
+ output_nextStep += 8;
+ }
+ if (1 == pass) {
+ store_coeff(&tr2_0, curr_out + 0 * 32, next_out + 0 * 32);
+ store_coeff(&tr2_1, curr_out + 1 * 32, next_out + 1 * 32);
+ store_coeff(&tr2_2, curr_out + 2 * 32, next_out + 2 * 32);
+ store_coeff(&tr2_3, curr_out + 3 * 32, next_out + 3 * 32);
+ store_coeff(&tr2_4, curr_out + 4 * 32, next_out + 4 * 32);
+ store_coeff(&tr2_5, curr_out + 5 * 32, next_out + 5 * 32);
+ store_coeff(&tr2_6, curr_out + 6 * 32, next_out + 6 * 32);
+ store_coeff(&tr2_7, curr_out + 7 * 32, next_out + 7 * 32);
+ curr_out += 8;
+ next_out += 8;
+ }
}
}
}
diff --git a/aom_dsp/x86/fwd_txfm_avx2.c b/aom_dsp/x86/fwd_txfm_avx2.c
index d381a6e..670f864 100644
--- a/aom_dsp/x86/fwd_txfm_avx2.c
+++ b/aom_dsp/x86/fwd_txfm_avx2.c
@@ -17,14 +17,6 @@
#undef FDCT32x32_2D_AVX2
#undef FDCT32x32_HIGH_PRECISION
-// TODO(luoyi): The following macro hides an error. The second parameter type of
-// function,
-// void FDCT32x32_2D_AVX2(const int16_t *, int16_t*, int);
-// is different from the one in,
-// void aom_fdct32x32_avx2(const int16_t *, tran_low_t*, int);
-// In CONFIG_AOM_HIGHBITDEPTH=1 build, the second parameter type should be
-// int32_t.
-// This function should be removed after av1_fht32x32 scaling/rounding fix.
#define FDCT32x32_2D_AVX2 aom_fdct32x32_avx2
#define FDCT32x32_HIGH_PRECISION 1
#include "aom_dsp/x86/fwd_dct32x32_impl_avx2.h" // NOLINT
diff --git a/aom_dsp/x86/fwd_txfm_sse2.h b/aom_dsp/x86/fwd_txfm_sse2.h
index 3261584..fe3e446 100644
--- a/aom_dsp/x86/fwd_txfm_sse2.h
+++ b/aom_dsp/x86/fwd_txfm_sse2.h
@@ -12,6 +12,8 @@
#ifndef AOM_DSP_X86_FWD_TXFM_SSE2_H_
#define AOM_DSP_X86_FWD_TXFM_SSE2_H_
+#include "aom_dsp/x86/txfm_common_intrin.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -257,19 +259,6 @@
#endif // CONFIG_AOM_HIGHBITDEPTH
}
-static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) {
-#if CONFIG_AOM_HIGHBITDEPTH
- const __m128i zero = _mm_setzero_si128();
- const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
- __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
- __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
- _mm_storeu_si128((__m128i *)(dst_ptr), out0);
- _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
-#else
- _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
-#endif // CONFIG_AOM_HIGHBITDEPTH
-}
-
static INLINE __m128i mult_round_shift(const __m128i *pin0, const __m128i *pin1,
const __m128i *pmultiplier,
const __m128i *prounding,
diff --git a/aom_dsp/x86/txfm_common_intrin.h b/aom_dsp/x86/txfm_common_intrin.h
new file mode 100644
index 0000000..890e048
--- /dev/null
+++ b/aom_dsp/x86/txfm_common_intrin.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
+#define _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
+
+// Note:
+// This header file should be put below any x86 intrinsics head file
+
+static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
+ __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
+ __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
+ _mm_storeu_si128((__m128i *)(dst_ptr), out0);
+ _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
+#else
+ _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
+#endif // CONFIG_AOM_HIGHBITDEPTH
+}
+
+#endif // _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
diff --git a/av1/common/av1_convolve.c b/av1/common/av1_convolve.c
index 1f8d623..270ab70 100644
--- a/av1/common/av1_convolve.c
+++ b/av1/common/av1_convolve.c
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
#include <assert.h>
#include <string.h>
diff --git a/av1/common/av1_convolve.h b/av1/common/av1_convolve.h
index 804c102..dafa032 100644
--- a/av1/common/av1_convolve.h
+++ b/av1/common/av1_convolve.h
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
#ifndef AV1_COMMON_AV1_CONVOLVE_H_
#define AV1_COMMON_AV1_CONVOLVE_H_
#include "av1/common/filter.h"
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 8d7c7f8..65f7440 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -712,6 +712,14 @@
void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
+static INLINE int tx_size_to_depth(const TX_SIZE tx_size) {
+ return (int)(tx_size - TX_4X4);
+}
+
+static INLINE TX_SIZE depth_to_tx_size(const int depth) {
+ return (TX_SIZE)(depth + TX_4X4);
+}
+
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
const struct macroblockd_plane *pd) {
TX_SIZE uv_txsize;
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index 9d851e2..3acb58e 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -68,18 +68,27 @@
};
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 1,
+#endif
1, 4, 16, 64,
#if CONFIG_EXT_TX
2, 2, 8, 8, 32, 32
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 1,
+#endif
1, 2, 4, 8,
#if CONFIG_EXT_TX
1, 2, 2, 4, 4, 8
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 1,
+#endif
1, 2, 4, 8,
#if CONFIG_EXT_TX
2, 1, 4, 2, 8, 4
@@ -87,18 +96,27 @@
};
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 0,
+#endif
0, 2, 4, 6,
#if CONFIG_EXT_TX
1, 1, 3, 3, 5, 5
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 0,
+#endif
0, 1, 2, 3,
#if CONFIG_EXT_TX
0, 1, 1, 2, 2, 3
#endif // CONFIG_EXT_TX
};
static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 0,
+#endif
0, 1, 2, 3,
#if CONFIG_EXT_TX
1, 0, 2, 1, 3, 2
@@ -419,6 +437,9 @@
/* clang-format on */
static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ TX_2X2, // TX_2X2
+#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
@@ -434,6 +455,9 @@
};
static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ TX_2X2, // TX_2X2
+#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
@@ -450,6 +474,9 @@
// Transform block width in pixels
static const int tx_size_wide[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 2,
+#endif
4, 8, 16, 32,
#if CONFIG_EXT_TX
4, 8, 8, 16, 16, 32,
@@ -458,6 +485,9 @@
// Transform block height in pixels
static const int tx_size_high[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 2,
+#endif
4, 8, 16, 32,
#if CONFIG_EXT_TX
8, 4, 16, 8, 32, 16,
@@ -466,6 +496,9 @@
// Transform block width in unit
static const int tx_size_wide_unit[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 1,
+#endif
1, 2, 4, 8,
#if CONFIG_EXT_TX
1, 2, 2, 4, 4, 8,
@@ -474,6 +507,9 @@
// Transform block height in unit
static const int tx_size_high_unit[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 1,
+#endif
1, 2, 4, 8,
#if CONFIG_EXT_TX
2, 1, 4, 2, 8, 4,
@@ -482,6 +518,9 @@
// Transform block width in log2
static const int tx_size_wide_log2[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 2,
+#endif
2, 3, 4, 5,
#if CONFIG_EXT_TX
2, 3, 3, 4, 4, 5,
@@ -490,6 +529,9 @@
// Transform block height in log2
static const int tx_size_high_log2[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 2,
+#endif
2, 3, 4, 5,
#if CONFIG_EXT_TX
3, 2, 4, 3, 5, 4,
@@ -497,6 +539,9 @@
};
static const int tx_size_2d[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ 4,
+#endif
16, 64, 256, 1024,
#if CONFIG_EXT_TX
32, 32, 128, 128, 512, 512,
@@ -509,6 +554,9 @@
static const int tx_size_1d_in_unit_log2[TX_SIZES] = { 0, 1, 2, 3 };
static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ BLOCK_4X4, // TX_2X2
+#endif
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_16X16, // TX_16X16
@@ -524,6 +572,9 @@
};
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ TX_2X2, // TX_2X2
+#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
@@ -539,6 +590,9 @@
};
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+ TX_2X2, // TX_2X2
+#endif
TX_4X4, // TX_4X4
TX_8X8, // TX_8X8
TX_16X16, // TX_16X16
@@ -589,7 +643,10 @@
// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
{
- // BLOCK_4X4
+// BLOCK_4X4
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -604,7 +661,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_4X8
+// BLOCK_4X8
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -623,7 +683,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_8X4
+// BLOCK_8X4
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -642,7 +705,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_8X8
+// BLOCK_8X8
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
@@ -657,7 +723,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_8X16
+// BLOCK_8X16
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
@@ -676,7 +745,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_16X8
+// BLOCK_16X8
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
{ { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } },
@@ -695,7 +767,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_16X16
+// BLOCK_16X16
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
@@ -710,7 +785,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_16X32
+// BLOCK_16X32
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
@@ -729,7 +807,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_32X16
+// BLOCK_32X16
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
@@ -748,7 +829,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_32X32
+// BLOCK_32X32
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -763,7 +847,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_32X64
+// BLOCK_32X64
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -778,7 +865,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_64X32
+// BLOCK_64X32
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -793,7 +883,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_64X64
+// BLOCK_64X64
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -801,7 +894,10 @@
#if CONFIG_EXT_PARTITION
},
{
- // BLOCK_64X128
+// BLOCK_64X128
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -816,7 +912,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_128X64
+// BLOCK_128X64
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -831,7 +930,10 @@
#endif // CONFIG_EXT_TX
},
{
- // BLOCK_128X128
+// BLOCK_128X128
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
{ { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -891,8 +993,11 @@
#if CONFIG_SUPERTX
static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
- // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
- // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+#if CONFIG_CB4X4
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
{ { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
{ { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 870632d..049c6ef 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -58,6 +58,9 @@
#endif
const uint16_t band_count_table[TX_SIZES_ALL][8] = {
+#if CONFIG_CB4X4
+ { 1, 2, 2, 3, 0, 0, 0 },
+#endif
{ 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
#if CONFIG_EXT_TX
@@ -68,6 +71,9 @@
};
const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
+#if CONFIG_CB4X4
+ { 0, 1, 3, 6, 10, 13, 16, 0 },
+#endif
{ 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
#if CONFIG_EXT_TX
@@ -2833,6 +2839,9 @@
ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1);
av1_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]);
#else
+#if CONFIG_CB4X4
+ av1_copy(cm->fc->coef_probs[TX_2X2], default_coef_probs_4x4);
+#endif
av1_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
av1_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
av1_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
@@ -2913,7 +2922,7 @@
count_sat = COEF_COUNT_SAT;
}
#endif // CONFIG_ENTROPY
- for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+ for (tx_size = 0; tx_size < TX_SIZES; tx_size++)
adapt_coef_probs(cm, tx_size, count_sat, update_factor);
#if CONFIG_RANS
av1_coef_pareto_cdfs(cm->fc);
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e25dcf8..52dc8f1 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -567,117 +567,117 @@
[PALETTE_COLORS - 1] = {
{
// 2 colors
- { 230, 255, 128, 128, 128, 128, 128 },
- { 214, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 240, 255, 128, 128, 128, 128, 128 },
- { 73, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 130, 255, 128, 128, 128, 128, 128 },
- { 227, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 188, 255, 128, 128, 128, 128, 128 },
- { 75, 255, 128, 128, 128, 128, 128 },
- { 250, 255, 128, 128, 128, 128, 128 },
- { 223, 255, 128, 128, 128, 128, 128 },
- { 252, 255, 128, 128, 128, 128, 128 },
+ { 230, 0, 0, 0, 0, 0, 0 },
+ { 214, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 240, 0, 0, 0, 0, 0, 0 },
+ { 73, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 130, 0, 0, 0, 0, 0, 0 },
+ { 227, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 188, 0, 0, 0, 0, 0, 0 },
+ { 75, 0, 0, 0, 0, 0, 0 },
+ { 250, 0, 0, 0, 0, 0, 0 },
+ { 223, 0, 0, 0, 0, 0, 0 },
+ { 252, 0, 0, 0, 0, 0, 0 },
},
{
// 3 colors
- { 229, 137, 255, 128, 128, 128, 128 },
- { 197, 120, 255, 128, 128, 128, 128 },
- { 107, 195, 255, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 27, 151, 255, 128, 128, 128, 128 },
- { 230, 130, 255, 128, 128, 128, 128 },
- { 37, 230, 255, 128, 128, 128, 128 },
- { 67, 221, 255, 128, 128, 128, 128 },
- { 124, 230, 255, 128, 128, 128, 128 },
- { 195, 109, 255, 128, 128, 128, 128 },
- { 99, 122, 255, 128, 128, 128, 128 },
- { 205, 208, 255, 128, 128, 128, 128 },
- { 40, 235, 255, 128, 128, 128, 128 },
- { 251, 132, 255, 128, 128, 128, 128 },
- { 237, 186, 255, 128, 128, 128, 128 },
- { 253, 112, 255, 128, 128, 128, 128 },
+ { 229, 137, 0, 0, 0, 0, 0 },
+ { 197, 120, 0, 0, 0, 0, 0 },
+ { 107, 195, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 27, 151, 0, 0, 0, 0, 0 },
+ { 230, 130, 0, 0, 0, 0, 0 },
+ { 37, 230, 0, 0, 0, 0, 0 },
+ { 67, 221, 0, 0, 0, 0, 0 },
+ { 124, 230, 0, 0, 0, 0, 0 },
+ { 195, 109, 0, 0, 0, 0, 0 },
+ { 99, 122, 0, 0, 0, 0, 0 },
+ { 205, 208, 0, 0, 0, 0, 0 },
+ { 40, 235, 0, 0, 0, 0, 0 },
+ { 251, 132, 0, 0, 0, 0, 0 },
+ { 237, 186, 0, 0, 0, 0, 0 },
+ { 253, 112, 0, 0, 0, 0, 0 },
},
{
// 4 colors
- { 195, 87, 128, 255, 128, 128, 128 },
- { 143, 100, 123, 255, 128, 128, 128 },
- { 94, 124, 119, 255, 128, 128, 128 },
- { 77, 91, 130, 255, 128, 128, 128 },
- { 39, 114, 178, 255, 128, 128, 128 },
- { 222, 94, 125, 255, 128, 128, 128 },
- { 44, 203, 132, 255, 128, 128, 128 },
- { 68, 175, 122, 255, 128, 128, 128 },
- { 110, 187, 124, 255, 128, 128, 128 },
- { 152, 91, 128, 255, 128, 128, 128 },
- { 70, 109, 181, 255, 128, 128, 128 },
- { 133, 113, 164, 255, 128, 128, 128 },
- { 47, 205, 133, 255, 128, 128, 128 },
- { 247, 94, 136, 255, 128, 128, 128 },
- { 205, 122, 146, 255, 128, 128, 128 },
- { 251, 100, 141, 255, 128, 128, 128 },
+ { 195, 87, 128, 0, 0, 0, 0 },
+ { 143, 100, 123, 0, 0, 0, 0 },
+ { 94, 124, 119, 0, 0, 0, 0 },
+ { 77, 91, 130, 0, 0, 0, 0 },
+ { 39, 114, 178, 0, 0, 0, 0 },
+ { 222, 94, 125, 0, 0, 0, 0 },
+ { 44, 203, 132, 0, 0, 0, 0 },
+ { 68, 175, 122, 0, 0, 0, 0 },
+ { 110, 187, 124, 0, 0, 0, 0 },
+ { 152, 91, 128, 0, 0, 0, 0 },
+ { 70, 109, 181, 0, 0, 0, 0 },
+ { 133, 113, 164, 0, 0, 0, 0 },
+ { 47, 205, 133, 0, 0, 0, 0 },
+ { 247, 94, 136, 0, 0, 0, 0 },
+ { 205, 122, 146, 0, 0, 0, 0 },
+ { 251, 100, 141, 0, 0, 0, 0 },
},
{
// 5 colors
- { 195, 65, 84, 125, 255, 128, 128 },
- { 150, 76, 84, 121, 255, 128, 128 },
- { 94, 110, 81, 117, 255, 128, 128 },
- { 79, 85, 91, 139, 255, 128, 128 },
- { 26, 102, 139, 127, 255, 128, 128 },
- { 220, 73, 91, 119, 255, 128, 128 },
- { 38, 203, 86, 127, 255, 128, 128 },
- { 61, 186, 72, 124, 255, 128, 128 },
- { 132, 199, 84, 128, 255, 128, 128 },
- { 172, 52, 62, 120, 255, 128, 128 },
- { 102, 89, 121, 122, 255, 128, 128 },
- { 182, 48, 69, 186, 255, 128, 128 },
- { 36, 206, 87, 126, 255, 128, 128 },
- { 249, 55, 67, 122, 255, 128, 128 },
- { 218, 88, 75, 122, 255, 128, 128 },
- { 253, 64, 80, 119, 255, 128, 128 },
+ { 195, 65, 84, 125, 0, 0, 0 },
+ { 150, 76, 84, 121, 0, 0, 0 },
+ { 94, 110, 81, 117, 0, 0, 0 },
+ { 79, 85, 91, 139, 0, 0, 0 },
+ { 26, 102, 139, 127, 0, 0, 0 },
+ { 220, 73, 91, 119, 0, 0, 0 },
+ { 38, 203, 86, 127, 0, 0, 0 },
+ { 61, 186, 72, 124, 0, 0, 0 },
+ { 132, 199, 84, 128, 0, 0, 0 },
+ { 172, 52, 62, 120, 0, 0, 0 },
+ { 102, 89, 121, 122, 0, 0, 0 },
+ { 182, 48, 69, 186, 0, 0, 0 },
+ { 36, 206, 87, 126, 0, 0, 0 },
+ { 249, 55, 67, 122, 0, 0, 0 },
+ { 218, 88, 75, 122, 0, 0, 0 },
+ { 253, 64, 80, 119, 0, 0, 0 },
},
{
// 6 colors
- { 182, 54, 64, 75, 118, 255, 128 },
- { 126, 67, 70, 76, 116, 255, 128 },
- { 79, 92, 67, 85, 120, 255, 128 },
- { 63, 61, 81, 118, 132, 255, 128 },
- { 21, 80, 105, 83, 119, 255, 128 },
- { 215, 72, 74, 74, 111, 255, 128 },
- { 50, 176, 63, 79, 120, 255, 128 },
- { 72, 148, 66, 77, 120, 255, 128 },
- { 105, 177, 57, 78, 130, 255, 128 },
- { 150, 66, 66, 80, 127, 255, 128 },
- { 81, 76, 109, 85, 116, 255, 128 },
- { 113, 81, 62, 96, 148, 255, 128 },
- { 54, 179, 69, 82, 121, 255, 128 },
- { 244, 47, 48, 67, 118, 255, 128 },
- { 198, 83, 53, 65, 121, 255, 128 },
- { 250, 42, 51, 69, 110, 255, 128 },
+ { 182, 54, 64, 75, 118, 0, 0 },
+ { 126, 67, 70, 76, 116, 0, 0 },
+ { 79, 92, 67, 85, 120, 0, 0 },
+ { 63, 61, 81, 118, 132, 0, 0 },
+ { 21, 80, 105, 83, 119, 0, 0 },
+ { 215, 72, 74, 74, 111, 0, 0 },
+ { 50, 176, 63, 79, 120, 0, 0 },
+ { 72, 148, 66, 77, 120, 0, 0 },
+ { 105, 177, 57, 78, 130, 0, 0 },
+ { 150, 66, 66, 80, 127, 0, 0 },
+ { 81, 76, 109, 85, 116, 0, 0 },
+ { 113, 81, 62, 96, 148, 0, 0 },
+ { 54, 179, 69, 82, 121, 0, 0 },
+ { 244, 47, 48, 67, 118, 0, 0 },
+ { 198, 83, 53, 65, 121, 0, 0 },
+ { 250, 42, 51, 69, 110, 0, 0 },
},
{
// 7 colors
- { 182, 45, 54, 62, 74, 113, 255 },
- { 124, 63, 57, 62, 77, 114, 255 },
- { 77, 80, 56, 66, 76, 117, 255 },
- { 63, 57, 69, 98, 85, 131, 255 },
- { 19, 81, 98, 63, 80, 116, 255 },
- { 215, 56, 60, 63, 68, 105, 255 },
- { 50, 174, 50, 60, 79, 118, 255 },
- { 68, 151, 50, 58, 73, 117, 255 },
- { 104, 182, 53, 57, 79, 127, 255 },
- { 156, 50, 51, 63, 77, 111, 255 },
- { 88, 67, 97, 59, 82, 120, 255 },
- { 114, 81, 46, 65, 103, 132, 255 },
- { 55, 166, 57, 66, 82, 120, 255 },
- { 245, 34, 38, 43, 63, 114, 255 },
- { 203, 68, 45, 47, 60, 118, 255 },
- { 250, 35, 37, 47, 66, 110, 255 },
+ { 182, 45, 54, 62, 74, 113, 0 },
+ { 124, 63, 57, 62, 77, 114, 0 },
+ { 77, 80, 56, 66, 76, 117, 0 },
+ { 63, 57, 69, 98, 85, 131, 0 },
+ { 19, 81, 98, 63, 80, 116, 0 },
+ { 215, 56, 60, 63, 68, 105, 0 },
+ { 50, 174, 50, 60, 79, 118, 0 },
+ { 68, 151, 50, 58, 73, 117, 0 },
+ { 104, 182, 53, 57, 79, 127, 0 },
+ { 156, 50, 51, 63, 77, 111, 0 },
+ { 88, 67, 97, 59, 82, 120, 0 },
+ { 114, 81, 46, 65, 103, 132, 0 },
+ { 55, 166, 57, 66, 82, 120, 0 },
+ { 245, 34, 38, 43, 63, 114, 0 },
+ { 203, 68, 45, 47, 60, 118, 0 },
+ { 250, 35, 37, 47, 66, 110, 0 },
},
{
// 8 colors
@@ -700,141 +700,144 @@
}
};
-const aom_prob av1_default_palette_uv_color_prob
- [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
- [PALETTE_COLORS - 1] = { {
- // 2 colors
- { 228, 255, 128, 128, 128, 128, 128 },
- { 195, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 228, 255, 128, 128, 128, 128, 128 },
- { 71, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 129, 255, 128, 128, 128, 128, 128 },
- { 206, 255, 128, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 136, 255, 128, 128, 128, 128, 128 },
- { 98, 255, 128, 128, 128, 128, 128 },
- { 236, 255, 128, 128, 128, 128, 128 },
- { 222, 255, 128, 128, 128, 128, 128 },
- { 249, 255, 128, 128, 128, 128, 128 },
- },
- {
- // 3 colors
- { 198, 136, 255, 128, 128, 128, 128 },
- { 178, 105, 255, 128, 128, 128, 128 },
- { 100, 206, 255, 128, 128, 128, 128 },
- { 128, 128, 128, 128, 128, 128, 128 },
- { 12, 136, 255, 128, 128, 128, 128 },
- { 219, 134, 255, 128, 128, 128, 128 },
- { 50, 198, 255, 128, 128, 128, 128 },
- { 61, 231, 255, 128, 128, 128, 128 },
- { 110, 209, 255, 128, 128, 128, 128 },
- { 173, 106, 255, 128, 128, 128, 128 },
- { 145, 166, 255, 128, 128, 128, 128 },
- { 156, 175, 255, 128, 128, 128, 128 },
- { 69, 183, 255, 128, 128, 128, 128 },
- { 241, 163, 255, 128, 128, 128, 128 },
- { 224, 160, 255, 128, 128, 128, 128 },
- { 246, 154, 255, 128, 128, 128, 128 },
- },
- {
- // 4 colors
- { 173, 88, 143, 255, 128, 128, 128 },
- { 146, 81, 127, 255, 128, 128, 128 },
- { 84, 134, 102, 255, 128, 128, 128 },
- { 69, 138, 140, 255, 128, 128, 128 },
- { 31, 103, 200, 255, 128, 128, 128 },
- { 217, 101, 139, 255, 128, 128, 128 },
- { 51, 174, 121, 255, 128, 128, 128 },
- { 64, 177, 109, 255, 128, 128, 128 },
- { 96, 179, 145, 255, 128, 128, 128 },
- { 164, 77, 114, 255, 128, 128, 128 },
- { 87, 94, 156, 255, 128, 128, 128 },
- { 105, 57, 173, 255, 128, 128, 128 },
- { 63, 158, 137, 255, 128, 128, 128 },
- { 236, 102, 156, 255, 128, 128, 128 },
- { 197, 115, 153, 255, 128, 128, 128 },
- { 245, 106, 154, 255, 128, 128, 128 },
- },
- {
- // 5 colors
- { 179, 64, 97, 129, 255, 128, 128 },
- { 137, 56, 88, 125, 255, 128, 128 },
- { 82, 107, 61, 118, 255, 128, 128 },
- { 59, 113, 86, 115, 255, 128, 128 },
- { 23, 88, 118, 130, 255, 128, 128 },
- { 213, 66, 90, 125, 255, 128, 128 },
- { 37, 181, 103, 121, 255, 128, 128 },
- { 47, 188, 61, 131, 255, 128, 128 },
- { 104, 185, 103, 144, 255, 128, 128 },
- { 163, 39, 76, 112, 255, 128, 128 },
- { 94, 74, 131, 126, 255, 128, 128 },
- { 142, 42, 103, 163, 255, 128, 128 },
- { 53, 162, 99, 149, 255, 128, 128 },
- { 239, 54, 84, 108, 255, 128, 128 },
- { 203, 84, 110, 147, 255, 128, 128 },
- { 248, 70, 105, 151, 255, 128, 128 },
- },
- {
- // 6 colors
- { 189, 50, 67, 90, 130, 255, 128 },
- { 114, 50, 55, 90, 123, 255, 128 },
- { 66, 76, 54, 82, 128, 255, 128 },
- { 43, 69, 69, 80, 129, 255, 128 },
- { 22, 59, 87, 88, 141, 255, 128 },
- { 203, 49, 68, 87, 122, 255, 128 },
- { 43, 157, 74, 104, 146, 255, 128 },
- { 54, 138, 51, 95, 138, 255, 128 },
- { 82, 171, 58, 102, 146, 255, 128 },
- { 129, 38, 59, 64, 168, 255, 128 },
- { 56, 67, 119, 92, 112, 255, 128 },
- { 96, 62, 53, 132, 82, 255, 128 },
- { 60, 147, 77, 108, 145, 255, 128 },
- { 238, 76, 73, 93, 148, 255, 128 },
- { 189, 86, 73, 103, 157, 255, 128 },
- { 246, 62, 75, 83, 167, 255, 128 },
- },
- {
- // 7 colors
- { 179, 42, 51, 73, 99, 134, 255 },
- { 119, 52, 52, 61, 64, 114, 255 },
- { 53, 77, 35, 65, 71, 131, 255 },
- { 38, 70, 51, 68, 89, 144, 255 },
- { 23, 65, 128, 73, 97, 131, 255 },
- { 210, 47, 52, 63, 81, 143, 255 },
- { 42, 159, 57, 68, 98, 143, 255 },
- { 49, 153, 45, 82, 93, 143, 255 },
- { 81, 169, 52, 72, 113, 151, 255 },
- { 136, 46, 35, 56, 75, 96, 255 },
- { 57, 84, 109, 47, 107, 131, 255 },
- { 128, 78, 57, 36, 128, 85, 255 },
- { 54, 149, 68, 77, 94, 153, 255 },
- { 243, 58, 50, 71, 81, 167, 255 },
- { 189, 92, 64, 70, 121, 173, 255 },
- { 248, 35, 38, 51, 82, 201, 255 },
- },
- {
- // 8 colors
- { 201, 40, 36, 42, 64, 92, 123 },
- { 116, 43, 33, 43, 73, 102, 128 },
- { 46, 77, 37, 69, 62, 78, 150 },
- { 40, 65, 52, 50, 76, 89, 133 },
- { 28, 48, 91, 17, 64, 77, 133 },
- { 218, 43, 43, 37, 56, 72, 163 },
- { 41, 155, 44, 83, 82, 129, 180 },
- { 44, 141, 29, 55, 64, 89, 147 },
- { 92, 166, 48, 45, 59, 126, 179 },
- { 169, 35, 49, 41, 36, 99, 139 },
- { 55, 77, 77, 56, 60, 75, 156 },
- { 155, 81, 51, 64, 57, 182, 255 },
- { 60, 134, 49, 49, 93, 128, 174 },
- { 244, 98, 51, 46, 22, 73, 238 },
- { 189, 70, 40, 87, 93, 79, 201 },
- { 248, 54, 49, 40, 29, 42, 227 },
- } };
+const aom_prob
+ av1_default_palette_uv_color_prob[PALETTE_MAX_SIZE - 1]
+ [PALETTE_COLOR_CONTEXTS]
+ [PALETTE_COLORS - 1] = {
+ {
+ // 2 colors
+ { 228, 0, 0, 0, 0, 0, 0 },
+ { 195, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 228, 0, 0, 0, 0, 0, 0 },
+ { 71, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 129, 0, 0, 0, 0, 0, 0 },
+ { 206, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 136, 0, 0, 0, 0, 0, 0 },
+ { 98, 0, 0, 0, 0, 0, 0 },
+ { 236, 0, 0, 0, 0, 0, 0 },
+ { 222, 0, 0, 0, 0, 0, 0 },
+ { 249, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ // 3 colors
+ { 198, 136, 0, 0, 0, 0, 0 },
+ { 178, 105, 0, 0, 0, 0, 0 },
+ { 100, 206, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 12, 136, 0, 0, 0, 0, 0 },
+ { 219, 134, 0, 0, 0, 0, 0 },
+ { 50, 198, 0, 0, 0, 0, 0 },
+ { 61, 231, 0, 0, 0, 0, 0 },
+ { 110, 209, 0, 0, 0, 0, 0 },
+ { 173, 106, 0, 0, 0, 0, 0 },
+ { 145, 166, 0, 0, 0, 0, 0 },
+ { 156, 175, 0, 0, 0, 0, 0 },
+ { 69, 183, 0, 0, 0, 0, 0 },
+ { 241, 163, 0, 0, 0, 0, 0 },
+ { 224, 160, 0, 0, 0, 0, 0 },
+ { 246, 154, 0, 0, 0, 0, 0 },
+ },
+ {
+ // 4 colors
+ { 173, 88, 143, 0, 0, 0, 0 },
+ { 146, 81, 127, 0, 0, 0, 0 },
+ { 84, 134, 102, 0, 0, 0, 0 },
+ { 69, 138, 140, 0, 0, 0, 0 },
+ { 31, 103, 200, 0, 0, 0, 0 },
+ { 217, 101, 139, 0, 0, 0, 0 },
+ { 51, 174, 121, 0, 0, 0, 0 },
+ { 64, 177, 109, 0, 0, 0, 0 },
+ { 96, 179, 145, 0, 0, 0, 0 },
+ { 164, 77, 114, 0, 0, 0, 0 },
+ { 87, 94, 156, 0, 0, 0, 0 },
+ { 105, 57, 173, 0, 0, 0, 0 },
+ { 63, 158, 137, 0, 0, 0, 0 },
+ { 236, 102, 156, 0, 0, 0, 0 },
+ { 197, 115, 153, 0, 0, 0, 0 },
+ { 245, 106, 154, 0, 0, 0, 0 },
+ },
+ {
+ // 5 colors
+ { 179, 64, 97, 129, 0, 0, 0 },
+ { 137, 56, 88, 125, 0, 0, 0 },
+ { 82, 107, 61, 118, 0, 0, 0 },
+ { 59, 113, 86, 115, 0, 0, 0 },
+ { 23, 88, 118, 130, 0, 0, 0 },
+ { 213, 66, 90, 125, 0, 0, 0 },
+ { 37, 181, 103, 121, 0, 0, 0 },
+ { 47, 188, 61, 131, 0, 0, 0 },
+ { 104, 185, 103, 144, 0, 0, 0 },
+ { 163, 39, 76, 112, 0, 0, 0 },
+ { 94, 74, 131, 126, 0, 0, 0 },
+ { 142, 42, 103, 163, 0, 0, 0 },
+ { 53, 162, 99, 149, 0, 0, 0 },
+ { 239, 54, 84, 108, 0, 0, 0 },
+ { 203, 84, 110, 147, 0, 0, 0 },
+ { 248, 70, 105, 151, 0, 0, 0 },
+ },
+ {
+ // 6 colors
+ { 189, 50, 67, 90, 130, 0, 0 },
+ { 114, 50, 55, 90, 123, 0, 0 },
+ { 66, 76, 54, 82, 128, 0, 0 },
+ { 43, 69, 69, 80, 129, 0, 0 },
+ { 22, 59, 87, 88, 141, 0, 0 },
+ { 203, 49, 68, 87, 122, 0, 0 },
+ { 43, 157, 74, 104, 146, 0, 0 },
+ { 54, 138, 51, 95, 138, 0, 0 },
+ { 82, 171, 58, 102, 146, 0, 0 },
+ { 129, 38, 59, 64, 168, 0, 0 },
+ { 56, 67, 119, 92, 112, 0, 0 },
+ { 96, 62, 53, 132, 82, 0, 0 },
+ { 60, 147, 77, 108, 145, 0, 0 },
+ { 238, 76, 73, 93, 148, 0, 0 },
+ { 189, 86, 73, 103, 157, 0, 0 },
+ { 246, 62, 75, 83, 167, 0, 0 },
+ },
+ {
+ // 7 colors
+ { 179, 42, 51, 73, 99, 134, 0 },
+ { 119, 52, 52, 61, 64, 114, 0 },
+ { 53, 77, 35, 65, 71, 131, 0 },
+ { 38, 70, 51, 68, 89, 144, 0 },
+ { 23, 65, 128, 73, 97, 131, 0 },
+ { 210, 47, 52, 63, 81, 143, 0 },
+ { 42, 159, 57, 68, 98, 143, 0 },
+ { 49, 153, 45, 82, 93, 143, 0 },
+ { 81, 169, 52, 72, 113, 151, 0 },
+ { 136, 46, 35, 56, 75, 96, 0 },
+ { 57, 84, 109, 47, 107, 131, 0 },
+ { 128, 78, 57, 36, 128, 85, 0 },
+ { 54, 149, 68, 77, 94, 153, 0 },
+ { 243, 58, 50, 71, 81, 167, 0 },
+ { 189, 92, 64, 70, 121, 173, 0 },
+ { 248, 35, 38, 51, 82, 201, 0 },
+ },
+ {
+ // 8 colors
+ { 201, 40, 36, 42, 64, 92, 123 },
+ { 116, 43, 33, 43, 73, 102, 128 },
+ { 46, 77, 37, 69, 62, 78, 150 },
+ { 40, 65, 52, 50, 76, 89, 133 },
+ { 28, 48, 91, 17, 64, 77, 133 },
+ { 218, 43, 43, 37, 56, 72, 163 },
+ { 41, 155, 44, 83, 82, 129, 180 },
+ { 44, 141, 29, 55, 64, 89, 147 },
+ { 92, 166, 48, 45, 59, 126, 179 },
+ { 169, 35, 49, 41, 36, 99, 139 },
+ { 55, 77, 77, 56, 60, 75, 156 },
+ { 155, 81, 51, 64, 57, 182, 255 },
+ { 60, 134, 49, 49, 93, 128, 174 },
+ { 244, 98, 51, 46, 22, 73, 238 },
+ { 189, 70, 40, 87, 93, 79, 201 },
+ { 248, 54, 49, 40, 29, 42, 227 },
+ }
+ };
static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
// (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2),
@@ -848,23 +851,23 @@
};
#endif // CONFIG_PALETTE
-const aom_tree_index av1_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = {
+const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)] = {
{
// Max tx_size is 8X8
- -TX_4X4, -TX_8X8,
+ -0, -1,
},
{
// Max tx_size is 16X16
- -TX_4X4, 2, -TX_8X8, -TX_16X16,
+ -0, 2, -1, -2,
},
{
// Max tx_size is 32X32
- -TX_4X4, 2, -TX_8X8, 4, -TX_16X16, -TX_32X32,
+ -0, 2, -1, 4, -2, -3,
},
};
-static const aom_prob default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS]
- [TX_SIZES - 1] = {
+static const aom_prob default_tx_size_prob[MAX_TX_DEPTH][TX_SIZE_CONTEXTS]
+ [MAX_TX_DEPTH] = {
{
// Max tx_size is 8X8
{ 100 },
@@ -902,30 +905,24 @@
int av1_get_palette_color_context(const uint8_t *color_map, int cols, int r,
int c, int n, uint8_t *color_order,
int *color_idx) {
- int i, j, max, max_idx, temp;
+ int i;
+ // The +10 below should not be needed. But we get a warning "array subscript
+ // is above array bounds [-Werror=array-bounds]" without it, possibly due to
+ // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
int scores[PALETTE_MAX_SIZE + 10];
- int weights[4] = { 3, 2, 3, 2 };
+ const int weights[4] = { 3, 2, 3, 2 };
int color_ctx = 0;
int color_neighbors[4];
int inverse_color_order[PALETTE_MAX_SIZE];
assert(n <= PALETTE_MAX_SIZE);
- if (c - 1 >= 0)
- color_neighbors[0] = color_map[r * cols + c - 1];
- else
- color_neighbors[0] = -1;
- if (c - 1 >= 0 && r - 1 >= 0)
- color_neighbors[1] = color_map[(r - 1) * cols + c - 1];
- else
- color_neighbors[1] = -1;
- if (r - 1 >= 0)
- color_neighbors[2] = color_map[(r - 1) * cols + c];
- else
- color_neighbors[2] = -1;
- if (r - 1 >= 0 && c + 1 <= cols - 1)
- color_neighbors[3] = color_map[(r - 1) * cols + c + 1];
- else
- color_neighbors[3] = -1;
+ color_neighbors[0] = (c - 1 >= 0) ? color_map[r * cols + c - 1] : -1;
+ color_neighbors[1] =
+ (c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * cols + c - 1] : -1;
+ color_neighbors[2] = (r - 1 >= 0) ? color_map[(r - 1) * cols + c] : -1;
+ color_neighbors[3] = (r - 1 >= 0 && c + 1 <= cols - 1)
+ ? color_map[(r - 1) * cols + c + 1]
+ : -1;
for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
color_order[i] = i;
@@ -933,23 +930,25 @@
}
memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
for (i = 0; i < 4; ++i) {
- if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i];
+ if (color_neighbors[i] >= 0) {
+ scores[color_neighbors[i]] += weights[i];
+ }
}
+ // Get the top 4 scores (sorted from large to small).
for (i = 0; i < 4; ++i) {
- max = scores[i];
- max_idx = i;
- j = i + 1;
- while (j < n) {
+ int max = scores[i];
+ int max_idx = i;
+ int j;
+ for (j = i + 1; j < n; ++j) {
if (scores[j] > max) {
max = scores[j];
max_idx = j;
}
- ++j;
}
if (max_idx != i) {
- temp = scores[i];
+ int temp = scores[i];
scores[i] = scores[max_idx];
scores[max_idx] = temp;
@@ -1310,13 +1309,21 @@
static const aom_prob
default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
+#if CONFIG_CB4X4
+ { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
+#endif
{ { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
{ { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } },
{ { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } },
};
static const aom_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
- { 160, 85, 128 }, { 176, 85, 128 }, { 192, 85, 128 },
+#if CONFIG_CB4X4
+ { 160, 85, 128 },
+#endif
+ { 160, 85, 128 },
+ { 176, 85, 128 },
+ { 192, 85, 128 },
};
#endif // CONFIG_EXT_TX
@@ -1561,7 +1568,7 @@
#if CONFIG_VAR_TX && CONFIG_EXT_TX && CONFIG_RECT_TX
if (cm->tx_mode == TX_MODE_SELECT) {
- for (i = 0; i < TX_SIZES - 1; ++i) {
+ for (i = 0; i < MAX_TX_DEPTH; ++i) {
fc->rect_tx_prob[i] =
av1_mode_mv_merge_probs(pre_fc->rect_tx_prob[i], counts->rect_tx[i]);
}
@@ -1593,7 +1600,7 @@
const FRAME_COUNTS *counts = &cm->counts;
if (cm->tx_mode == TX_MODE_SELECT) {
- for (i = 0; i < TX_SIZES - 1; ++i) {
+ for (i = 0; i < MAX_TX_DEPTH; ++i) {
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
aom_tree_merge_probs(av1_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
counts->tx_size[i][j], fc->tx_size_probs[i][j]);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 3043114..79b27da 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -125,7 +125,7 @@
#else
aom_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1];
#endif // CONFIG_EXT_REFS
- aom_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
+ aom_prob tx_size_probs[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH];
#if CONFIG_VAR_TX
aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -237,7 +237,7 @@
// to use forward updates for the coeff probs, and as such it does not really
// belong into this structure.
unsigned int tx_size_totals[TX_SIZES];
- unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+ unsigned int tx_size[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][TX_SIZES];
#if CONFIG_VAR_TX
unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -324,7 +324,7 @@
extern const aom_tree_index av1_palette_color_tree[PALETTE_MAX_SIZE - 1]
[TREE_SIZE(PALETTE_COLORS)];
#endif // CONFIG_PALETTE
-extern const aom_tree_index av1_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)];
+extern const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)];
#if CONFIG_EXT_INTRA
extern const aom_tree_index av1_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
#endif // CONFIG_EXT_INTRA
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 0a1f7a3..a684eed 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -131,6 +131,9 @@
// block transform size
typedef enum ATTRIBUTE_PACKED {
+#if CONFIG_CB4X4
+ TX_2X2, // 2x2 transform
+#endif
TX_4X4, // 4x4 transform
TX_8X8, // 8x8 transform
TX_16X16, // 16x16 transform
@@ -148,6 +151,8 @@
TX_INVALID = 255 // Invalid transform size
} TX_SIZE;
+#define MAX_TX_DEPTH (TX_32X32 - TX_4X4)
+
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
@@ -206,8 +211,12 @@
#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
#else
+#if CONFIG_CB4X4
+#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
+#else
#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
-#endif // CONFIG_EXT_TX
+#endif
+#endif // CONFIG_EXT_TX
typedef enum {
AOM_LAST_FLAG = 1 << 0,
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index dc7ee18..dec5514 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -40,6 +40,9 @@
//
// A loopfilter should be applied to every other 8x8 horizontally.
static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
+#if CONFIG_CB4X4
+ 0xffffffffffffffffULL, // TX_2X2
+#endif
0xffffffffffffffffULL, // TX_4X4
0xffffffffffffffffULL, // TX_8x8
0x5555555555555555ULL, // TX_16x16
@@ -64,6 +67,9 @@
//
// A loopfilter should be applied to every other 4 the row vertically.
static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
+#if CONFIG_CB4X4
+ 0xffffffffffffffffULL, // TX_4X4
+#endif
0xffffffffffffffffULL, // TX_4X4
0xffffffffffffffffULL, // TX_8x8
0x00ff00ff00ff00ffULL, // TX_16x16
@@ -142,6 +148,9 @@
// 16 bit masks for uv transform sizes.
static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
+#if CONFIG_CB4X4
+ 0xffff, // TX_2X2
+#endif
0xffff, // TX_4X4
0xffff, // TX_8x8
0x5555, // TX_16x16
@@ -149,6 +158,9 @@
};
static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
+#if CONFIG_CB4X4
+ 0xffff, // TX_2X2
+#endif
0xffff, // TX_4X4
0xffff, // TX_8x8
0x0f0f, // TX_16x16
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index be1cbc1..f9b51aa 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -733,7 +733,8 @@
TX_SIZE tx_size) {
int above = *above_ctx < tx_size;
int left = *left_ctx < tx_size;
- return (tx_size - 1) * 3 + above + left;
+
+ return (tx_size - TX_8X8) * 3 + above + left;
}
#endif
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index b3ef1c4..b906749 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -186,8 +186,13 @@
if (!has_left) left_ctx = above_ctx;
if (!has_above) above_ctx = left_ctx;
-
+#if CONFIG_CB4X4
+ // TODO(jingning): Temporary setup. Will rework this after the cb4x4
+ // framework is up running.
+ return (above_ctx + left_ctx) > max_tx_size + 1;
+#else
return (above_ctx + left_ctx) > max_tx_size;
+#endif
}
#if CONFIG_VAR_TX
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 3eec384..5082d7b 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -418,7 +418,6 @@
const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
const struct scale_factors *sf);
-#if CONFIG_DUAL_FILTER
// Detect if the block have sub-pixel level motion vectors
// per component.
static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
@@ -460,60 +459,22 @@
return 0;
}
-#endif
-#if CONFIG_EXT_INTERP
static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) {
MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int is_compound = has_second_ref(mbmi);
- int intpel_mv = 1;
- int plane;
-
-#if SUPPORT_NONINTERPOLATING_FILTERS
- // TODO(debargha): This is is currently only for experimentation
- // with non-interpolating filters. Remove later.
- // If any of the filters are non-interpolating, then indicate the
- // interpolation filter always.
- int i;
- for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- if (!IsInterpolatingFilter(i)) return 1;
- }
-#endif
-
- // For scaled references, interpolation filter is indicated all the time.
- if (av1_is_scaled(&xd->block_refs[0]->sf)) return 1;
- if (is_compound && av1_is_scaled(&xd->block_refs[1]->sf)) return 1;
-
- if (bsize < BLOCK_8X8) {
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- int ref;
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- int x, y;
- for (y = 0; y < num_4x4_h; ++y)
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
- if (mv_has_subpel(&mv)) return 1;
- }
+ const int is_compound = has_second_ref(&mi->mbmi);
+ int ref;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int row_col;
+ for (row_col = 0; row_col < 2; ++row_col) {
+ const int dir = (ref << 1) + row_col;
+ if (has_subpel_mv_component(mi, xd, dir)) {
+ return 1;
}
}
- return 0;
- } else {
- intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
- if (is_compound && intpel_mv) {
- intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
- }
}
- return !intpel_mv;
+ return 0;
}
-#endif // CONFIG_EXT_INTERP
#if CONFIG_MOTION_VAR
const uint8_t *av1_get_obmc_mask(int length);
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 1281843..693386f 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -3801,6 +3801,9 @@
#endif // CONFIG_EXT_TX
const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
+#if CONFIG_CB4X4
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+#endif
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
{ default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
@@ -3809,6 +3812,27 @@
#if CONFIG_EXT_TX
const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_CB4X4
+ {
+ // TX_2X2
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+ { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+ { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+ },
+#endif
{
// TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3896,6 +3920,27 @@
};
const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+#if CONFIG_CB4X4
+ {
+ // TX_2X2
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ },
+#endif
{
// TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -4134,9 +4179,16 @@
}
};
-#else // CONFIG_EXT_TX
+#else // CONFIG_EXT_TX
const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_CB4X4
+ { // TX_2X2
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+ { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+ { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors } },
+#endif
{ // TX_4X4
{ default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
{ row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 8ab549b..b3617d1 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3661,7 +3661,7 @@
#endif
if (cm->tx_mode == TX_MODE_SELECT) {
- for (i = 0; i < TX_SIZES - 1; ++i)
+ for (i = 0; i < MAX_TX_DEPTH; ++i)
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
for (k = 0; k < i + 1; ++k)
av1_diff_update_prob(&r, &fc->tx_size_probs[i][j][k], ACCT_STR);
@@ -3674,7 +3674,7 @@
av1_diff_update_prob(&r, &fc->txfm_partition_prob[k], ACCT_STR);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (cm->tx_mode == TX_MODE_SELECT) {
- for (i = 1; i < TX_SIZES - 1; ++i)
+ for (i = 1; i < MAX_TX_DEPTH; ++i)
av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 3993e72..31183c0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -350,11 +350,11 @@
int tx_size_cat, aom_reader *r) {
FRAME_COUNTS *counts = xd->counts;
const int ctx = get_tx_size_context(xd);
- int tx_size =
- aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
- cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
- if (counts) ++counts->tx_size[tx_size_cat][ctx][tx_size];
- return (TX_SIZE)tx_size;
+ int depth = aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
+ cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
+ TX_SIZE tx_size = depth_to_tx_size(depth);
+ if (counts) ++counts->tx_size[tx_size_cat][ctx][depth];
+ return tx_size;
}
static TX_SIZE read_tx_size_intra(AV1_COMMON *cm, MACROBLOCKD *xd,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 377af50..899aa81 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -74,7 +74,7 @@
static struct av1_token palette_color_encodings[PALETTE_MAX_SIZE - 1]
[PALETTE_MAX_SIZE];
#endif // CONFIG_PALETTE
-static const struct av1_token tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
+static const struct av1_token tx_size_encodings[MAX_TX_DEPTH][TX_SIZES] = {
{ { 0, 1 }, { 1, 1 } }, // Max tx_size is 8X8
{ { 0, 1 }, { 2, 2 }, { 3, 2 } }, // Max tx_size is 16X16
{ { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // Max tx_size is 32X32
@@ -414,6 +414,7 @@
const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
: intra_tx_size_cat_lookup[bsize];
const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+ const int depth = tx_size_to_depth(coded_tx_size);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
@@ -423,7 +424,7 @@
av1_write_token(w, av1_tx_size_tree[tx_size_cat],
cm->fc->tx_size_probs[tx_size_cat][tx_size_ctx],
- &tx_size_encodings[tx_size_cat][coded_tx_size]);
+ &tx_size_encodings[tx_size_cat][depth]);
}
}
@@ -2873,7 +2874,7 @@
FRAME_COUNTS *counts) {
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
- for (i = 0; i < TX_SIZES - 1; ++i)
+ for (i = 0; i < MAX_TX_DEPTH; ++i)
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
prob_diff_update(av1_tx_size_tree[i], cm->fc->tx_size_probs[i][j],
counts->tx_size[i][j], i + 2, w);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 3733efc..c94c1d8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4442,8 +4442,10 @@
av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
// Set up pointers to per thread motion search counters.
- td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
- td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
+ this_tile->m_search_count = 0; // Count of motion search hits.
+ this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
+ td->mb.m_search_count_ptr = &this_tile->m_search_count;
+ td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += cm->mib_size) {
@@ -4484,10 +4486,35 @@
#define MIN_TRANS_THRESH 8
#define GLOBAL_MOTION_ADVANTAGE_THRESH 0.60
#define GLOBAL_MOTION_MODEL ROTZOOM
-// TODO(sarahparker) This function needs to be adjusted
-// to accomodate changes in the paraemter integerization.
-// Commenting it out until the fix is made.
-/*
+
+// Adds some offset to a global motion parameter and handles
+// all of the necessary precision shifts, clamping, and
+// zero-centering.
+static int16_t add_param_offset(int param_index, int16_t param_value,
+ int16_t offset) {
+ int scale_vals[2] = { GM_ALPHA_PREC_DIFF, GM_TRANS_PREC_DIFF };
+ int clamp_vals[2] = { GM_ALPHA_MAX, GM_TRANS_MAX };
+ int is_trans_param = param_index < 2;
+ int is_one_centered = (!is_trans_param) && (param_index & 1);
+
+ // Make parameter zero-centered and offset the shift that was done to make
+ // it compatible with the warped model
+ param_value = (param_value - (is_one_centered << WARPEDMODEL_PREC_BITS)) >>
+ scale_vals[is_trans_param];
+ // Add desired offset to the rescaled/zero-centered parameter
+ param_value += offset;
+ // Clamp the parameter so it does not overflow the number of bits allotted
+ // to it in the bitstream
+ param_value = (int16_t)clamp(param_value, -clamp_vals[is_trans_param],
+ clamp_vals[is_trans_param]);
+ // Rescale the parameter to WARPEDMODEL_PRECIION_BITS so it is compatible
+ // with the warped motion library
+ param_value *= (1 << scale_vals[is_trans_param]);
+
+ // Undo the zero-centring step if necessary
+ return param_value + (is_one_centered << WARPEDMODEL_PREC_BITS);
+}
+
static void refine_integerized_param(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
int use_hbd, int bd,
@@ -4500,7 +4527,7 @@
int n_params = n_trans_model_params[wm->wmtype];
int16_t *param_mat = (int16_t *)wm->wmmat;
double step_error;
- int step;
+ int16_t step;
int16_t *param;
int16_t curr_param;
int16_t best_param;
@@ -4519,9 +4546,7 @@
best_param = curr_param;
for (i = 0; i < n_refinements; i++) {
// look to the left
- *param =
- (int16_t)clamp(curr_param - step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
- p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
+ *param = add_param_offset(p, curr_param, -step);
step_error =
av1_warp_erroradv(wm,
#if CONFIG_AOM_HIGHBITDEPTH
@@ -4538,9 +4563,7 @@
}
// look to the right
- *param =
- (int16_t)clamp(curr_param + step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
- p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
+ *param = add_param_offset(p, curr_param, step);
step_error =
av1_warp_erroradv(wm,
#if CONFIG_AOM_HIGHBITDEPTH
@@ -4564,7 +4587,6 @@
*param = best_param;
}
}
-*/
static void convert_to_params(const double *params, TransformationType type,
int16_t *model) {
@@ -4624,8 +4646,6 @@
av1_zero(*td->counts);
av1_zero(rdc->coef_counts);
av1_zero(rdc->comp_pred_diff);
- rdc->m_search_count = 0; // Count of motion search hits.
- rdc->ex_search_count = 0; // Exhaustive mesh search hits.
#if CONFIG_GLOBAL_MOTION
aom_clear_system_state();
@@ -4643,6 +4663,14 @@
convert_model_to_params(params, GLOBAL_MOTION_MODEL,
&cm->global_motion[frame]);
if (get_gmtype(&cm->global_motion[frame]) > GLOBAL_ZERO) {
+ refine_integerized_param(
+ &cm->global_motion[frame].motion_params,
+#if CONFIG_AOM_HIGHBITDEPTH
+ xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif // CONFIG_AOM_HIGHBITDEPTH
+ ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height,
+ ref_buf->y_stride, cpi->Source->y_buffer, cpi->Source->y_width,
+ cpi->Source->y_height, cpi->Source->y_stride, 3);
// compute the advantage of using gm parameters over 0 motion
erroradvantage = av1_warp_erroradv(
&cm->global_motion[frame].motion_params,
@@ -5206,6 +5234,7 @@
const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
: intra_tx_size_cat_lookup[bsize];
const TX_SIZE coded_tx_size = txsize_sqr_up_map[mbmi->tx_size];
+ const int depth = tx_size_to_depth(coded_tx_size);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -5219,7 +5248,7 @@
if (is_inter) {
tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts);
} else {
- ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+ ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
if (mbmi->tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
}
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -5227,7 +5256,7 @@
#endif
#endif
#if !CONFIG_VAR_TX
- ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+ ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
#endif
} else {
int i, j;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 6b7e72c..c5459dc 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -81,7 +81,7 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
const PLANE_TYPE plane_type = pd->plane_type;
- const int default_eob = get_tx2d_size(tx_size);
+ const int default_eob = tx_size_2d[tx_size];
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
@@ -826,12 +826,9 @@
}
#if CONFIG_VAR_TX
- for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i) {
- a[i] = a[0];
- }
- for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i) {
- l[i] = l[0];
- }
+ for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) a[i] = a[0];
+
+ for (i = 0; i < tx_size_high_unit[tx_size]; ++i) l[i] = l[0];
#endif
if (p->eobs[block]) *(args->skip) = 0;
@@ -885,25 +882,23 @@
if (tx_size == plane_tx_size) {
encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
} else {
- int bsl = b_width_log2_lookup[bsize];
+ int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
int i;
-
assert(bsl > 0);
- --bsl;
-
#if CONFIG_EXT_TX
assert(tx_size < TX_SIZES);
#endif // CONFIG_EXT_TX
for (i = 0; i < 4; ++i) {
- const int offsetr = blk_row + ((i >> 1) << bsl);
- const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+ const int offsetr = blk_row + ((i >> 1) * bsl);
+ const int offsetc = blk_col + ((i & 0x01) * bsl);
+ const TX_SIZE sub_txs = tx_size - 1;
+ int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
- tx_size - 1, arg);
+ sub_txs, arg);
}
}
}
@@ -983,14 +978,15 @@
// TODO(jingning): Clean this up.
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
- const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+ const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- const int bh = num_4x4_blocks_wide_lookup[txb_size];
+ const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+ const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
int idx, idy;
int block = 0;
- int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
av1_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
#else
const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -1009,7 +1005,7 @@
} else {
#endif
for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bh) {
+ for (idx = 0; idx < mi_width; idx += bw) {
encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
&arg);
block += step;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 629eb46..03f6ffd 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3596,7 +3596,7 @@
recon_err = aom_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
+ fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
"%10"PRId64" %10"PRId64" %10d "
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
@@ -3605,8 +3605,6 @@
"%10lf %8u %10"PRId64" %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
- cpi->td.rd_counts.m_search_count,
- cpi->td.rd_counts.ex_search_count,
cpi->rc.source_alt_ref_pending,
cpi->rc.source_alt_ref_active,
cpi->rc.this_frame_target,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 8738609..00abc71 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -268,13 +268,13 @@
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int mode_map[BLOCK_SIZES][MAX_MODES];
+ int m_search_count;
+ int ex_search_count;
} TileDataEnc;
typedef struct RD_COUNTS {
av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
- int m_search_count;
- int ex_search_count;
} RD_COUNTS;
typedef struct ThreadData {
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 117d0ed..5876d15 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -27,10 +27,6 @@
for (n = 0; n < ENTROPY_TOKENS; n++)
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
-
- // Counts of all motion searches and exhuastive mesh searches.
- td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
- td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
}
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 5015837..cfc8cb3 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -110,7 +110,7 @@
}
#endif // CONFIG_PALETTE
- for (i = 0; i < TX_SIZES - 1; ++i)
+ for (i = 0; i < MAX_TX_DEPTH; ++i)
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
av1_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j],
av1_tx_size_tree[i]);
@@ -720,30 +720,41 @@
#if CONFIG_DUAL_FILTER
int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *const xd) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- int inter_filter_cost = 0;
- int dir;
+ const AV1_COMMON *const cm = &cpi->common;
+ if (cm->interp_filter == SWITCHABLE) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int inter_filter_cost = 0;
+ int dir;
- for (dir = 0; dir < 2; ++dir) {
- if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
- (mbmi->ref_frame[1] > INTRA_FRAME &&
- has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
- const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
- inter_filter_cost +=
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+ const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
+ inter_filter_cost +=
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+ }
}
+ return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
+ } else {
+ return 0;
}
- return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
}
#else
int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *const xd) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int ctx = av1_get_pred_context_switchable_interp(xd);
+ const AV1_COMMON *const cm = &cpi->common;
+ if (cm->interp_filter == SWITCHABLE) {
#if CONFIG_EXT_INTERP
- if (!av1_is_interp_needed(xd)) return 0;
-#endif // CONFIG_EXT_INTERP
- return SWITCHABLE_INTERP_RATE_FACTOR *
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ if (av1_is_interp_needed(xd))
+#endif
+ {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx = av1_get_pred_context_switchable_interp(xd);
+ return SWITCHABLE_INTERP_RATE_FACTOR *
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ }
+ }
+ return 0;
}
#endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 76d471e..c6b9979 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1318,9 +1318,9 @@
const int tx_size_cat =
is_inter ? inter_tx_size_cat_lookup[bs] : intra_tx_size_cat_lookup[bs];
const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+ const int depth = tx_size_to_depth(coded_tx_size);
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
- const int r_tx_size =
- cpi->tx_size_cost[tx_size_cat][tx_size_ctx][coded_tx_size];
+ const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
assert(skip_prob > 0);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -2746,7 +2746,7 @@
// not the tokenonly rate.
this_rate_tokenonly -=
cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
- [mic->mbmi.tx_size];
+ [tx_size_to_depth(mic->mbmi.tx_size)];
}
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mic->mbmi.mode == DC_PRED)
@@ -3222,7 +3222,8 @@
int bh = num_4x4_blocks_wide_lookup[txb_size];
int idx, idy;
int block = 0;
- int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ int step = tx_size_wide_unit[max_txsize_lookup[plane_bsize]] *
+ tx_size_high_unit[max_txsize_lookup[plane_bsize]];
ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
@@ -3519,6 +3520,7 @@
coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
ta[i] = !(p->eobs[block] == 0);
for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
@@ -6367,10 +6369,6 @@
const int this_mode = mbmi->mode;
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-#if CONFIG_DUAL_FILTER
- (void)pred_filter_search;
- return SWITCHABLE;
-#else
if (pred_filter_search) {
InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
@@ -6385,7 +6383,6 @@
#endif // CONFIG_EXT_INTER
best_filter = af;
}
-#endif
if (is_comp_pred) {
if (cpi->sf.adaptive_mode_search) {
#if CONFIG_EXT_INTER
@@ -6448,15 +6445,8 @@
#endif // CONFIG_EXT_INTER
}
}
- if (cm->interp_filter != BILINEAR) {
- if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
- best_filter = EIGHTTAP_REGULAR;
- }
-#if CONFIG_EXT_INTERP
- else if (!av1_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
- best_filter = EIGHTTAP_REGULAR;
- }
-#endif
+ if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
+ best_filter = EIGHTTAP_REGULAR;
}
return best_filter;
}
@@ -6676,6 +6666,7 @@
int_mv cur_mv[2];
int rate_mv = 0;
#if CONFIG_EXT_INTER
+ int pred_exists = 1;
const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
int_mv single_newmv[TOTAL_REFS_PER_FRAME];
@@ -6706,6 +6697,7 @@
uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
#endif // CONFIG_VAR_TX
int64_t best_distortion = INT64_MAX;
+ int64_t best_rd = INT64_MAX;
MB_MODE_INFO best_mbmi;
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff;
@@ -6713,24 +6705,14 @@
MB_MODE_INFO best_bmc_mbmi;
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-
- int pred_exists = 0;
- int intpel_mv;
- int64_t rd, tmp_rd, best_rd = INT64_MAX;
- int best_needs_copy = 0;
+ int64_t rd = INT64_MAX;
+ int64_t tmp_rd = INT64_MAX;
uint8_t *orig_dst[MAX_MB_PLANE];
int orig_dst_stride[MAX_MB_PLANE];
+ uint8_t *tmp_dst[MAX_MB_PLANE];
+ int tmp_dst_stride[MAX_MB_PLANE];
int rs = 0;
-#if CONFIG_DUAL_FILTER
- // Index use case:
- // {0, 1} -> (vertical, horizontal) filter types for the first ref frame
- // {2, 3} -> (vertical, horizontal) filter types for the second ref frame
- InterpFilter best_filter[4] = {
- SWITCHABLE, SWITCHABLE, SWITCHABLE, SWITCHABLE,
- };
-#else
- InterpFilter best_filter = SWITCHABLE;
-#endif
+ InterpFilter assign_filter = SWITCHABLE;
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
@@ -6966,6 +6948,10 @@
// one for future predictions. In the end, copy from tmp_buf to
// dst if necessary.
for (i = 0; i < MAX_MB_PLANE; i++) {
+ tmp_dst[i] = tmp_buf + i * MAX_SB_SQUARE;
+ tmp_dst_stride[i] = MAX_SB_SIZE;
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
orig_dst[i] = xd->plane[i].dst.buf;
orig_dst_stride[i] = xd->plane[i].dst.stride;
}
@@ -7003,135 +6989,125 @@
)
return INT64_MAX;
- pred_exists = 0;
- // Are all MVs integer pel for Y and UV
- intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
- if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
-
+ if (cm->interp_filter == SWITCHABLE) {
#if !CONFIG_DUAL_FILTER
- best_filter =
- predict_interp_filter(cpi, x, bsize, mi_row, mi_col, single_filter);
+ assign_filter =
+ predict_interp_filter(cpi, x, bsize, mi_row, mi_col, single_filter);
#endif
+#if CONFIG_EXT_INTERP || CONFIG_DUAL_FILTER
+ if (!av1_is_interp_needed(xd)) assign_filter = EIGHTTAP_REGULAR;
+#endif
+ } else {
+ assign_filter = cm->interp_filter;
+ }
- if (cm->interp_filter != BILINEAR) {
- int newbest;
- int tmp_rate_sum = 0;
- int64_t tmp_dist_sum = 0;
-
+ { // Do interpolation filter search in the parentheses
+ int tmp_rate;
+ int64_t tmp_dist;
#if CONFIG_DUAL_FILTER
- for (i = 0; i < SWITCHABLE_FILTERS * SWITCHABLE_FILTERS; ++i)
+ mbmi->interp_filter[0] =
+ assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+ mbmi->interp_filter[1] =
+ assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+ mbmi->interp_filter[2] =
+ assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+ mbmi->interp_filter[3] =
+ assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
#else
- for (i = 0; i < SWITCHABLE_FILTERS; ++i)
+ mbmi->interp_filter =
+ assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
#endif
- {
- int j;
- int64_t rs_rd;
- int tmp_skip_sb = 0;
- int64_t tmp_skip_sse = INT64_MAX;
+ rs = av1_get_switchable_rate(cpi, xd);
+ av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ if (assign_filter == SWITCHABLE) {
+ // do interp_filter search
+ if (av1_is_interp_needed(xd)) {
+ int best_in_temp = 0;
#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] = filter_sets[i][0];
- mbmi->interp_filter[1] = filter_sets[i][1];
- mbmi->interp_filter[2] = filter_sets[i][0];
- mbmi->interp_filter[3] = filter_sets[i][1];
+ InterpFilter best_filter[4];
+ av1_copy(best_filter, mbmi->interp_filter);
#else
- mbmi->interp_filter = i;
+ InterpFilter best_filter = mbmi->interp_filter;
#endif
- rs = av1_get_switchable_rate(cpi, xd);
- rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+ restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+#if CONFIG_DUAL_FILTER
+ // EIGHTTAP_REGULAR mode is calculated beforehand
+ for (i = 1; i < SWITCHABLE_FILTERS * SWITCHABLE_FILTERS; ++i)
+#else
+ // EIGHTTAP_REGULAR mode is calculated beforehand
+ for (i = 1; i < SWITCHABLE_FILTERS; ++i)
+#endif
+ {
+ int tmp_skip_sb = 0;
+ int64_t tmp_skip_sse = INT64_MAX;
+ int tmp_rs;
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] = filter_sets[i][0];
+ mbmi->interp_filter[1] = filter_sets[i][1];
+ mbmi->interp_filter[2] = filter_sets[i][0];
+ mbmi->interp_filter[3] = filter_sets[i][1];
+#else
+ mbmi->interp_filter = i;
+#endif
+ tmp_rs = av1_get_switchable_rate(cpi, xd);
+ av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
- if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
- rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
- if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
- } else {
- int rate_sum = 0;
- int64_t dist_sum = 0;
- if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
-#if CONFIG_EXT_INTER
- is_comp_interintra_pred ||
-#endif // CONFIG_EXT_INTER
- (cm->interp_filter != SWITCHABLE &&
- (
+ if (tmp_rd < rd) {
+ rd = tmp_rd;
+ rs = av1_get_switchable_rate(cpi, xd);
#if CONFIG_DUAL_FILTER
- cm->interp_filter == mbmi->interp_filter[0]
+ av1_copy(best_filter, mbmi->interp_filter);
#else
- cm->interp_filter == mbmi->interp_filter
+ best_filter = mbmi->interp_filter;
#endif
- || (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- } else {
- for (j = 0; j < MAX_MB_PLANE; j++) {
- xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
- xd->plane[j].dst.stride = MAX_SB_SIZE;
+ skip_txfm_sb = tmp_skip_sb;
+ skip_sse_sb = tmp_skip_sse;
+ best_in_temp = !best_in_temp;
+ if (best_in_temp) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ } else {
+ restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+ }
}
}
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &rate_sum,
- &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
-
- rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
- if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
-
- if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
- tmp_rate_sum = rate_sum;
- tmp_dist_sum = dist_sum;
+ if (best_in_temp) {
+ restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+ } else {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
}
- }
- newbest = i == 0 || rd < best_rd;
-
- if (newbest) {
- best_rd = rd;
#if CONFIG_DUAL_FILTER
- best_filter[0] = mbmi->interp_filter[0];
- best_filter[1] = mbmi->interp_filter[1];
- best_filter[2] = mbmi->interp_filter[2];
- best_filter[3] = mbmi->interp_filter[3];
+ av1_copy(mbmi->interp_filter, best_filter);
#else
- best_filter = mbmi->interp_filter;
+ mbmi->interp_filter = best_filter;
#endif
- if (cm->interp_filter == SWITCHABLE && i &&
- !(intpel_mv && IsInterpolatingFilter(i)))
- best_needs_copy = !best_needs_copy;
- }
-
- if ((cm->interp_filter == SWITCHABLE && newbest) ||
- (cm->interp_filter != SWITCHABLE &&
-#if CONFIG_DUAL_FILTER
- cm->interp_filter == mbmi->interp_filter[0]
-#else
- cm->interp_filter == mbmi->interp_filter
-#endif
- )) {
- pred_exists = 1;
- tmp_rd = best_rd;
-
- skip_txfm_sb = tmp_skip_sb;
- skip_sse_sb = tmp_skip_sse;
} else {
- pred_exists = 0;
+#if !CONFIG_EXT_INTERP && !CONFIG_DUAL_FILTER
+ int tmp_rs;
+ InterpFilter best_filter = mbmi->interp_filter;
+ rs = av1_get_switchable_rate(cpi, xd);
+ for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
+ mbmi->interp_filter = i;
+ tmp_rs = av1_get_switchable_rate(cpi, xd);
+ if (tmp_rs < rs) {
+ rs = tmp_rs;
+ best_filter = i;
+ }
+ }
+ mbmi->interp_filter = best_filter;
+#else
+ assert(0);
+#endif
}
}
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
}
-// Set the appropriate filter
-#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] =
- cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[0];
- mbmi->interp_filter[1] =
- cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[1];
- if (mbmi->ref_frame[1] > INTRA_FRAME) {
- mbmi->interp_filter[2] =
- cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[2];
- mbmi->interp_filter[3] =
- cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[3];
- }
-#else
- mbmi->interp_filter =
- cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
-#endif
- rs = cm->interp_filter == SWITCHABLE ? av1_get_switchable_rate(cpi, xd) : 0;
-
#if CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
best_bmc_mbmi = *mbmi;
@@ -7432,29 +7408,15 @@
pred_exists = 0;
}
#endif // CONFIG_EXT_INTERP
-#endif // CONFIG_EXT_INTER
-
- if (pred_exists) {
- if (best_needs_copy) {
- // again temporarily set the buffers to local memory to prevent a memcpy
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE;
- xd->plane[i].dst.stride = MAX_SB_SIZE;
- }
- }
- rd = tmp_rd;
- } else {
+ if (pred_exists == 0) {
int tmp_rate;
int64_t tmp_dist;
-
- // Handles the special case when a filter that is not in the
- // switchable list (ex. bilinear) is indicated at the frame level, or
- // skip condition holds.
av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
}
+#endif // CONFIG_EXT_INTER
#if CONFIG_DUAL_FILTER
if (!is_comp_pred) single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
@@ -8054,7 +8016,7 @@
// (prediction granularity), so we account for it in the full rate,
// not the tokenonly rate.
rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
- [mbmi->tx_size];
+ [tx_size_to_depth(mbmi->tx_size)];
}
rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
@@ -8811,8 +8773,9 @@
// tokenonly rate, but for intra blocks, tx_size is always coded
// (prediction granularity), so we account for it in the full rate,
// not the tokenonly rate.
- rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8]
- [get_tx_size_context(xd)][mbmi->tx_size];
+ rate_y -=
+ cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
+ [tx_size_to_depth(mbmi->tx_size)];
}
#if CONFIG_EXT_INTRA
if (is_directional_mode) {
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 43ed837..47cc02a 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -375,7 +375,7 @@
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
- const int eob_max = num_4x4_blocks_txsize_lookup[tx_size] << 4;
+ const int eob_max = tx_size_2d[tx_size];
return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
@@ -574,15 +574,19 @@
const int tx_col = blk_col >> (1 - pd->subsampling_x);
TX_SIZE plane_tx_size;
- int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
- int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+ int max_blocks_high = block_size_high[plane_bsize];
+ int max_blocks_wide = block_size_wide[plane_bsize];
assert(tx_size < TX_SIZES);
if (xd->mb_to_bottom_edge < 0)
- max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
if (xd->mb_to_right_edge < 0)
- max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+ max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
+
+ // Scale to the transform block unit.
+ max_blocks_high >>= tx_size_wide_log2[0];
+ max_blocks_wide >>= tx_size_wide_log2[0];
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
@@ -600,20 +604,23 @@
else if (dry_run == DRY_RUN_COSTCOEFFS)
cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
} else {
- int bsl = b_width_log2_lookup[bsize];
+ // Half the block size in transform block unit.
+ int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
int i;
assert(bsl > 0);
- --bsl;
for (i = 0; i < 4; ++i) {
- const int offsetr = blk_row + ((i >> 1) << bsl);
- const int offsetc = blk_col + ((i & 0x01) << bsl);
- int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+ const int offsetr = blk_row + ((i >> 1) * bsl);
+ const int offsetc = blk_col + ((i & 0x01) * bsl);
+
+ // TODO(jingning): Fix this tx_size transition.
+ const TX_SIZE sub_txs = tx_size - 1;
+ int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
- tokenize_vartx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
+ tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc,
block + i * step, plane, arg);
}
}
@@ -649,16 +656,17 @@
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
- const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+ const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
- int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+ int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
int idx, idy;
int block = 0;
- int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bh) {
+ for (idx = 0; idx < mi_width; idx += bw) {
tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
block, plane, &arg);
block += step;
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index e4179ef..cb2fbd5 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -436,6 +436,15 @@
&aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+ AVX2, Trans32x32Test,
+ ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
+ &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+ make_tuple(&aom_fdct32x32_rd_avx2,
+ &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+#endif // HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
#if HAVE_MSA && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MSA, Trans32x32Test,
diff --git a/test/ethread_test.cc b/test/ethread_test.cc
index 6b2f1ea..c72f16d 100644
--- a/test/ethread_test.cc
+++ b/test/ethread_test.cc
@@ -89,6 +89,7 @@
encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
encoder->Control(AOME_SET_ARNR_TYPE, 3);
+ encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
} else {
encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
encoder->Control(AV1E_SET_AQ_MODE, 3);
@@ -180,5 +181,5 @@
AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
::testing::Values(::libaom_test::kTwoPassGood,
::libaom_test::kOnePassGood),
- ::testing::Range(1, 3));
+ ::testing::Range(0, 3));
} // namespace
diff --git a/test/fht32x32_test.cc b/test/fht32x32_test.cc
index 1f85761..8545b2c 100644
--- a/test/fht32x32_test.cc
+++ b/test/fht32x32_test.cc
@@ -90,12 +90,11 @@
IhtFunc inv_txfm_;
};
-// TODO(luoyi): Owing to the range check in DCT_DCT of av1_fht32x32_avx2, as
-// input is out of the range, we use aom_fdct32x32_avx2. However this function
-// does not support CONFIG_AOM_HIGHBITDEPTH. I need to fix the scaling/rounding
-// of av1_fht32x32_avx2 then add this test on CONFIG_AOM_HIGHBITDEPTH.
-#if !CONFIG_AOM_HIGHBITDEPTH
TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
+// TODO(luoyi): As CONFIG_AOM_HIGHBITDEPTH = 1, our AVX2 implementation of
+// av1_fht32x32 does not support tran_low_t (int32_t) as intermediate result.
+// Therefore MemCheck test, tx_type=1,2,...,8 can't pass the test yet.
+#if !CONFIG_AOM_HIGHBITDEPTH
TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
#endif